load("../data/Frecuencia_De_Accidentes_Mensual.Rda")
load("../data/Dias_Especiales_Mensuales.Rda")
Se crean las columnas de accidentes Graves y leves para saber la frecuencia por día
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
Total_Dataset_Freq_M <- cast(Total_Dataset_Freq_M[,c(1,3,4,5)],ANO+MES~GRAVEDAD)
## Using FREQ as value column. Use the value argument to cast to override this choice
Se agrega la columna TOTAL_ACCIDENTES
Total_Dataset_Freq_M$TOTAL_ACCIDENTES <- Total_Dataset_Freq_M$ACCIDENTES_GRAVES + Total_Dataset_Freq_M$ACCIDENTES_LEVES
Total_Dataset_Freq_M <- sqldf("SELECT*
FROM Total_Dataset_Freq_M FS
LEFT JOIN Dias_Especiales_Mensuales DES
ON (FS.ANO=DES.ANO AND FS.MES=DES.MES)")
library(dplyr)
Total_Dataset_Freq_M<-unite_(Total_Dataset_Freq_M, "Ano_mes", c("ANO..6","MES..8"))
save(Total_Dataset_Freq_M,file="../Modelos/Total_Dataset_Freq_M_mensual.Rda")
Se ajustarán modelos con la información disponible desde el 01 de enero de 2014 hasta el 31 de diciembre de 2017 y se utilizará el año 2018 para validar el modelo:
Train_M_Dataset <- subset(Total_Dataset_Freq_M, ANO!="2018")
summary(Train_M_Dataset $ANO)
## Length Class Mode
## 48 character character
Se ajustan otra vez los niveles del factor ANO
Train_M_Dataset $ANO <- factor(Train_M_Dataset $ANO)
summary(Train_M_Dataset $ANO)
## 2014 2015 2016 2017
## 12 12 12 12
library(sqldf)
Test_M_Dataset <- sqldf("SELECT *
FROM Total_Dataset_Freq_M
WHERE ANO == 2018")
summary(Test_M_Dataset$ANO)
## Length Class Mode
## 12 character character
Se ajustan otra vez los niveles del factor ANO
Test_M_Dataset$ANO <- factor(Test_M_Dataset$ANO)
summary(Test_M_Dataset$ANO)
## 2018
## 12
Se utilizará el método forward selection para elegir las mejores variables explicativas del modelo teniendo como criterio aquellas variables que presente mejor R^2 ajustado
library (leaps)
regfit.fwd=regsubsets (TOTAL_ACCIDENTES∼Ano_Base+MES+Freq_lunes+Freq_martes+Freq_miercoles+Freq_jueves+Freq_viernes+Freq_sabado+Freq_domingo+Feriados+Feriados_Lunes+Feriados_Otro,Train_M_Dataset , method ="forward", nvmax= 80)
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax,
## force.in = force.in, : 1 linear dependencies found
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax,
## force.in = force.in, : nvmax reduced to 21
## Warning in rval$lopt[] <- rval$vorder[rval$lopt]: número de items para para
## sustituir no es un múltiplo de la longitud del reemplazo
summary (regfit.fwd)
## Subset selection object
## Call: regsubsets.formula(TOTAL_ACCIDENTES ~ Ano_Base + MES + Freq_lunes +
## Freq_martes + Freq_miercoles + Freq_jueves + Freq_viernes +
## Freq_sabado + Freq_domingo + Feriados + Feriados_Lunes +
## Feriados_Otro, Train_M_Dataset, method = "forward", nvmax = 80)
## 22 Variables (and intercept)
## Forced in Forced out
## Ano_Base FALSE FALSE
## MES02 FALSE FALSE
## MES03 FALSE FALSE
## MES04 FALSE FALSE
## MES05 FALSE FALSE
## MES06 FALSE FALSE
## MES07 FALSE FALSE
## MES08 FALSE FALSE
## MES09 FALSE FALSE
## MES10 FALSE FALSE
## MES11 FALSE FALSE
## MES12 FALSE FALSE
## Freq_lunes FALSE FALSE
## Freq_martes FALSE FALSE
## Freq_miercoles FALSE FALSE
## Freq_jueves FALSE FALSE
## Freq_viernes FALSE FALSE
## Freq_sabado FALSE FALSE
## Freq_domingo FALSE FALSE
## Feriados FALSE FALSE
## Feriados_Lunes FALSE FALSE
## Feriados_Otro FALSE FALSE
## 1 subsets of each size up to 21
## Selection Algorithm: forward
## Ano_Base MES02 MES03 MES04 MES05 MES06 MES07 MES08 MES09 MES10
## 1 ( 1 ) " " " " " " " " " " " " " " "*" " " " "
## 2 ( 1 ) " " " " " " " " " " " " " " "*" " " " "
## 3 ( 1 ) " " "*" " " " " " " " " " " "*" " " " "
## 4 ( 1 ) " " "*" " " " " "*" " " " " "*" " " " "
## 5 ( 1 ) " " "*" " " " " "*" " " " " "*" " " " "
## 6 ( 1 ) " " "*" " " " " "*" " " "*" "*" " " " "
## 7 ( 1 ) " " "*" "*" " " "*" " " "*" "*" " " " "
## 8 ( 1 ) " " "*" "*" " " "*" " " "*" "*" " " " "
## 9 ( 1 ) " " "*" "*" " " "*" " " "*" "*" " " "*"
## 10 ( 1 ) "*" "*" "*" " " "*" " " "*" "*" " " "*"
## 11 ( 1 ) "*" "*" "*" " " "*" " " "*" "*" "*" "*"
## 12 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*" "*" "*"
## 13 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*" "*" "*"
## 14 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*" "*" "*"
## 15 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## 16 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## 20 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## 21 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## MES11 MES12 Freq_lunes Freq_martes Freq_miercoles Freq_jueves
## 1 ( 1 ) " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " " " "
## 6 ( 1 ) " " " " " " " " " " " "
## 7 ( 1 ) " " " " " " " " " " " "
## 8 ( 1 ) " " "*" " " " " " " " "
## 9 ( 1 ) " " "*" " " " " " " " "
## 10 ( 1 ) " " "*" " " " " " " " "
## 11 ( 1 ) " " "*" " " " " " " " "
## 12 ( 1 ) " " "*" " " " " " " " "
## 13 ( 1 ) " " "*" " " " " " " " "
## 14 ( 1 ) "*" "*" " " " " " " " "
## 15 ( 1 ) "*" "*" " " " " " " " "
## 16 ( 1 ) "*" "*" " " " " " " " "
## 17 ( 1 ) "*" "*" " " " " " " " "
## 18 ( 1 ) "*" "*" " " " " "*" " "
## 19 ( 1 ) "*" "*" "*" " " "*" " "
## 20 ( 1 ) "*" "*" "*" " " "*" "*"
## 21 ( 1 ) "*" "*" "*" "*" "*" "*"
## Freq_viernes Freq_sabado Freq_domingo Feriados Feriados_Lunes
## 1 ( 1 ) " " " " " " " " " "
## 2 ( 1 ) " " " " " " "*" " "
## 3 ( 1 ) " " " " " " "*" " "
## 4 ( 1 ) " " " " " " "*" " "
## 5 ( 1 ) "*" " " " " "*" " "
## 6 ( 1 ) "*" " " " " "*" " "
## 7 ( 1 ) "*" " " " " "*" " "
## 8 ( 1 ) "*" " " " " "*" " "
## 9 ( 1 ) "*" " " " " "*" " "
## 10 ( 1 ) "*" " " " " "*" " "
## 11 ( 1 ) "*" " " " " "*" " "
## 12 ( 1 ) "*" " " " " "*" " "
## 13 ( 1 ) "*" " " " " "*" " "
## 14 ( 1 ) "*" " " " " "*" " "
## 15 ( 1 ) "*" " " " " "*" " "
## 16 ( 1 ) "*" "*" " " "*" " "
## 17 ( 1 ) "*" "*" "*" "*" " "
## 18 ( 1 ) "*" "*" "*" "*" " "
## 19 ( 1 ) "*" "*" "*" "*" " "
## 20 ( 1 ) "*" "*" "*" "*" " "
## 21 ( 1 ) "*" "*" "*" "*" " "
## Feriados_Otro
## 1 ( 1 ) " "
## 2 ( 1 ) " "
## 3 ( 1 ) " "
## 4 ( 1 ) " "
## 5 ( 1 ) " "
## 6 ( 1 ) " "
## 7 ( 1 ) " "
## 8 ( 1 ) " "
## 9 ( 1 ) " "
## 10 ( 1 ) " "
## 11 ( 1 ) " "
## 12 ( 1 ) " "
## 13 ( 1 ) "*"
## 14 ( 1 ) "*"
## 15 ( 1 ) "*"
## 16 ( 1 ) "*"
## 17 ( 1 ) "*"
## 18 ( 1 ) "*"
## 19 ( 1 ) "*"
## 20 ( 1 ) "*"
## 21 ( 1 ) "*"
reg.summary =summary(regfit.fwd)
names(reg.summary)
## [1] "which" "rsq" "rss" "adjr2" "cp" "bic" "outmat" "obj"
reg.summary$rsq
## [1] 0.1195680 0.2016062 0.3168790 0.4057561 0.4621648 0.5081664 0.5629993
## [8] 0.6187559 0.6594781 0.6963405 0.7262180 0.7430882 0.8081586 0.8238334
## [15] 0.8695697 0.8710930 0.8758966 0.8767008 0.8778598 0.8780811 0.8812346
Selección de variables con el mejor R^2 ajustado
max_adjr<-which.max (reg.summary$adjr2)
max_adjr
## [1] 15
par(mfrow =c(2,2))
plot(reg.summary$rss ,xlab=" Número de Variables ",ylab=" RSS",
type="l")
plot(reg.summary$adjr2 ,xlab =" Número de Variables ",
ylab=" Adjusted RSq",type="l")
points (max_adjr, reg.summary$adjr2[max_adjr], col ="red",cex =2, pch =20)
plot(regfit.fwd ,scale ="adjr2")
Variables seleccionadas
coef(regfit.fwd ,max_adjr)
## (Intercept) Ano_Base MES02 MES03 MES04
## 3183.96697 74.76902 167.14852 557.22647 395.02177
## MES05 MES06 MES07 MES08 MES09
## 619.98671 283.32424 558.98976 708.79724 512.27746
## MES10 MES11 MES12 Freq_viernes Feriados
## 493.77135 308.38629 509.45818 -55.75788 -53.99389
## Feriados_Otro
## -45.29530
set.seed(123) # fija la semilla del generador de parámetros para que sea reproducible
Se realiza el modelo de regresión lineal con las variables seleccionadas y se revisa el p-valor de cada una para seleccionar las variables definitivas del modelo
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_m = caret::train(TOTAL_ACCIDENTES∼Ano_Base+MES+Feriados, data=Train_M_Dataset ,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_m)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -166.88 -53.87 -8.75 67.13 199.55
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3522.46 14.39 244.752 < 2e-16 ***
## Ano_Base 37.76 14.56 2.594 0.0139 *
## MES02 65.64 25.54 2.570 0.0147 *
## MES03 168.41 20.11 8.375 8.89e-10 ***
## MES04 110.45 20.11 5.493 3.93e-06 ***
## MES05 180.22 19.69 9.152 1.07e-10 ***
## MES06 100.25 19.80 5.064 1.42e-05 ***
## MES07 161.84 20.11 8.049 2.21e-09 ***
## MES08 208.92 19.69 10.609 2.51e-12 ***
## MES09 154.25 25.54 6.039 7.66e-07 ***
## MES10 151.77 21.31 7.123 3.12e-08 ***
## MES11 110.47 19.69 5.609 2.77e-06 ***
## MES12 139.86 19.69 7.102 3.32e-08 ***
## Feriados -57.20 26.17 -2.185 0.0358 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 99.71 on 34 degrees of freedom
## Multiple R-squared: 0.8539, Adjusted R-squared: 0.798
## F-statistic: 15.29 on 13 and 34 DF, p-value: 1.458e-10
head(Train_M_Dataset )
## ANO MES ACCIDENTES_GRAVES ACCIDENTES_LEVES TOTAL_ACCIDENTES Ano_mes
## 1 2014 01 1619 1372 2991 2014_01
## 2 2014 02 1776 1482 3258 2014_02
## 3 2014 03 2152 1703 3855 2014_03
## 4 2014 04 1930 1495 3425 2014_04
## 5 2014 05 2037 1649 3686 2014_05
## 6 2014 06 1977 1427 3404 2014_06
## Ano_Base Freq_lunes Freq_martes Freq_miercoles Freq_jueves Freq_viernes
## 1 0 0 0 1 1 1
## 2 0 0 0 0 0 0
## 3 0 1 0 0 0 0
## 4 0 0 1 1 0 0
## 5 0 0 0 0 1 1
## 6 0 1 0 0 0 0
## Freq_sabado Freq_domingo Feriados Feriados_Lunes Feriados_Otro
## 1 0 0 2 1 1
## 2 0 0 0 0 0
## 3 1 1 1 1 0
## 4 0 0 2 0 2
## 5 1 0 1 0 1
## 6 0 1 3 3 0
library(caret)
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_m = caret::train(TOTAL_ACCIDENTES∼Ano_Base+MES+Feriados, data=Train_M_Dataset ,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_m)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -166.88 -53.87 -8.75 67.13 199.55
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3522.46 14.39 244.752 < 2e-16 ***
## Ano_Base 37.76 14.56 2.594 0.0139 *
## MES02 65.64 25.54 2.570 0.0147 *
## MES03 168.41 20.11 8.375 8.89e-10 ***
## MES04 110.45 20.11 5.493 3.93e-06 ***
## MES05 180.22 19.69 9.152 1.07e-10 ***
## MES06 100.25 19.80 5.064 1.42e-05 ***
## MES07 161.84 20.11 8.049 2.21e-09 ***
## MES08 208.92 19.69 10.609 2.51e-12 ***
## MES09 154.25 25.54 6.039 7.66e-07 ***
## MES10 151.77 21.31 7.123 3.12e-08 ***
## MES11 110.47 19.69 5.609 2.77e-06 ***
## MES12 139.86 19.69 7.102 3.32e-08 ***
## Feriados -57.20 26.17 -2.185 0.0358 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 99.71 on 34 degrees of freedom
## Multiple R-squared: 0.8539, Adjusted R-squared: 0.798
## F-statistic: 15.29 on 13 and 34 DF, p-value: 1.458e-10
caret_lm_fit_m
## Linear Regression
##
## 48 samples
## 3 predictor
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 44, 44, 43, 43, 43, 42, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 125.2733 0.6523645 102.8469
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_lm_m<-predict(caret_lm_fit_m,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_lm_m<-mean((Train_M_Dataset $TOTAL_ACCIDENTES-y_tr_pred_lm_m)^2) # calcula el mse de entrenamiento
RMSE_tr_lm_m = sqrt(mse_tr_lm_m)
mse_tr_lm_m
## [1] 7042.377
RMSE_tr_lm_m
## [1] 83.91887
Calculo MSE y RMSE para los datos de validación
y_test_pred_lm_m<-predict(caret_lm_fit_m,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_lm_m<-mean((Test_M_Dataset$TOTAL_ACCIDENTES-y_test_pred_lm_m)^2) # calcula el mse de entrenamiento
RMSE_test_lm_m = sqrt(mse_test_lm_m)
mse_test_lm_m
## [1] 42167.71
RMSE_test_lm_m
## [1] 205.3478
Predicción en la muestra
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
##
## rename
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_lm_m,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_lm_m,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_m_m = caret::train(ACCIDENTES_GRAVES∼MES+Feriados+Freq_domingo, data=Train_M_Dataset ,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_m_m)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -131.750 -48.669 2.574 37.767 138.250
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1962.02 11.31 173.434 < 2e-16 ***
## MES02 32.55 20.09 1.620 0.114429
## MES03 89.88 15.82 5.681 2.24e-06 ***
## MES04 49.54 15.85 3.126 0.003616 **
## MES05 88.47 15.48 5.715 2.02e-06 ***
## MES06 48.79 15.72 3.105 0.003825 **
## MES07 79.47 15.82 5.023 1.60e-05 ***
## MES08 115.28 15.48 7.448 1.22e-08 ***
## MES09 79.26 20.09 3.946 0.000378 ***
## MES10 63.49 16.80 3.778 0.000609 ***
## MES11 44.20 15.48 2.855 0.007275 **
## MES12 43.59 15.59 2.797 0.008438 **
## Feriados -30.96 21.05 -1.471 0.150498
## Freq_domingo 3.54 12.76 0.278 0.783058
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 78.38 on 34 degrees of freedom
## Multiple R-squared: 0.739, Adjusted R-squared: 0.6392
## F-statistic: 7.405 on 13 and 34 DF, p-value: 1.342e-06
caret_lm_fit_m_m
## Linear Regression
##
## 48 samples
## 3 predictor
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 43, 43, 43, 45, 43, 43, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 89.32155 0.5245695 74.06608
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_lm_m_m<-predict(caret_lm_fit_m_m,Train_M_Dataset [,c("Freq_domingo","MES","Feriados")])
mse_tr_lm_m_m<-mean((Train_M_Dataset $ACCIDENTES_GRAVES-y_tr_pred_lm_m_m)^2) # calcula el mse de entrenamiento
RMSE_tr_lm_m_m = sqrt(mse_tr_lm_m_m)
mse_tr_lm_m_m
## [1] 4351.268
RMSE_tr_lm_m_m
## [1] 65.96414
Calculo MSE y RMSE para los datos de validación
y_test_pred_lm_m_m<-predict(caret_lm_fit_m_m,Test_M_Dataset[,c("Freq_domingo","MES","Feriados")])
mse_test_lm_m_m<-mean((Test_M_Dataset$ACCIDENTES_GRAVES-y_test_pred_lm_m_m)^2) # calcula el mse de entrenamiento
RMSE_test_lm_m_m = sqrt(mse_test_lm_m_m)
mse_test_lm_m_m
## [1] 40538.09
RMSE_test_lm_m_m
## [1] 201.3407
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_lm_m_m,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
split = ~ANO,
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_lm_m_m,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_m_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+MES+Feriados, data=Train_M_Dataset ,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_m_sd)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -123.098 -41.439 3.148 38.715 125.223
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1560.44 10.22 152.709 < 2e-16 ***
## Ano_Base 27.26 10.34 2.637 0.012509 *
## MES02 33.82 18.13 1.865 0.070796 .
## MES03 78.46 14.28 5.495 3.90e-06 ***
## MES04 61.35 14.28 4.297 0.000137 ***
## MES05 91.75 13.98 6.562 1.62e-07 ***
## MES06 52.00 14.06 3.699 0.000759 ***
## MES07 82.30 14.28 5.764 1.74e-06 ***
## MES08 93.64 13.98 6.697 1.09e-07 ***
## MES09 75.72 18.13 4.176 0.000195 ***
## MES10 88.14 15.13 5.826 1.45e-06 ***
## MES11 66.27 13.98 4.739 3.73e-05 ***
## MES12 96.78 13.98 6.922 5.62e-08 ***
## Feriados -26.70 18.58 -1.437 0.159925
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 70.8 on 34 degrees of freedom
## Multiple R-squared: 0.7479, Adjusted R-squared: 0.6516
## F-statistic: 7.761 on 13 and 34 DF, p-value: 7.884e-07
caret_lm_fit_m_sd
## Linear Regression
##
## 48 samples
## 3 predictor
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 43, 43, 43, 42, 44, 43, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 79.92116 0.5284022 69.41677
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_lm_m_sd<-predict(caret_lm_fit_m_sd,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_lm_m_sd<-mean((Train_M_Dataset $ACCIDENTES_LEVES-y_tr_pred_lm_m_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_lm_m_sd = sqrt(mse_tr_lm_m_sd)
mse_tr_lm_m_sd
## [1] 3550.129
RMSE_tr_lm_m_sd
## [1] 59.58296
Calculo MSE y RMSE para los datos de validación
y_test_pred_lm_m_sd<-predict(caret_lm_fit_m_sd,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_lm_m_sd<-mean((Test_M_Dataset$ACCIDENTES_LEVES-y_test_pred_lm_m_sd)^2) # calcula el mse de entrenamiento
RMSE_test_lm_m_sd = sqrt(mse_test_lm_m_sd)
mse_test_lm_m_sd
## [1] 4435.997
RMSE_test_lm_m_sd
## [1] 66.60328
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_lm_m_sd,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_lm_m_sd,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Tipo_de_accidentes= c("Total Accidentes","Accidentes graves","Accidentes leves")
RMSE_Train_lm = round(c(RMSE_tr_lm_m,RMSE_tr_lm_m_m,RMSE_tr_lm_m_sd), 3)
RMSE_Test_lm = round(c(RMSE_test_lm_m,RMSE_test_lm_m_m,RMSE_test_lm_m_sd),3)
Tabla_lm = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_lm,RMSE_Test_lm))
Tabla_lm
## Tipo_de_accidentes RMSE_Train_lm RMSE_Test_lm
## 1 Total Accidentes 83.919 205.348
## 2 Accidentes graves 65.964 201.341
## 3 Accidentes leves 59.583 66.603
head(Train_M_Dataset )
## ANO MES ACCIDENTES_GRAVES ACCIDENTES_LEVES TOTAL_ACCIDENTES Ano_mes
## 1 2014 01 1619 1372 2991 2014_01
## 2 2014 02 1776 1482 3258 2014_02
## 3 2014 03 2152 1703 3855 2014_03
## 4 2014 04 1930 1495 3425 2014_04
## 5 2014 05 2037 1649 3686 2014_05
## 6 2014 06 1977 1427 3404 2014_06
## Ano_Base Freq_lunes Freq_martes Freq_miercoles Freq_jueves Freq_viernes
## 1 0 0 0 1 1 1
## 2 0 0 0 0 0 0
## 3 0 1 0 0 0 0
## 4 0 0 1 1 0 0
## 5 0 0 0 0 1 1
## 6 0 1 0 0 0 0
## Freq_sabado Freq_domingo Feriados Feriados_Lunes Feriados_Otro
## 1 0 0 2 1 1
## 2 0 0 0 0 0
## 3 1 1 1 1 0
## 4 0 0 2 0 2
## 5 1 0 1 0 1
## 6 0 1 3 3 0
library(caret)
trcntrl = trainControl(method="cv", number=10)
caret_knn_fit_m = caret::train(TOTAL_ACCIDENTES∼Ano_Base+MES+Feriados, data=Train_M_Dataset ,
method = "knn", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_knn_fit_m)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## theDots 0 -none- list
## xNames 13 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
caret_knn_fit_m
## k-Nearest Neighbors
##
## 48 samples
## 3 predictor
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 43, 43, 43, 44, 44, 44, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 286.9993 0.4149184 258.0058
## 7 293.9042 0.4854303 263.4552
## 9 234.7895 0.3648568 196.9260
## 11 230.5185 0.3211644 195.6398
## 13 214.3056 0.3830598 176.3921
## 15 215.5572 0.3325944 176.8265
## 17 218.2165 0.4297943 179.6711
## 19 220.7714 0.3912014 179.4098
## 21 217.7822 0.4217734 175.7535
## 23 217.2215 0.4266236 174.1418
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 13.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_knn_m<-predict(caret_knn_fit_m,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_knn_m<-mean((Train_M_Dataset $TOTAL_ACCIDENTES-y_tr_pred_knn_m)^2) # calcula el mse de entrenamiento
RMSE_tr_knn_m = sqrt(mse_tr_knn_m)
mse_tr_knn_m
## [1] 46788.17
RMSE_tr_knn_m
## [1] 216.3057
Calculo MSE y RMSE para los datos de validación
y_test_pred_knn_m<-predict(caret_knn_fit_m,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_knn_m<-mean((Test_M_Dataset$TOTAL_ACCIDENTES-y_test_pred_knn_m)^2) # calcula el mse de entrenamiento
RMSE_test_knn_m = sqrt(mse_test_knn_m)
mse_test_knn_m
## [1] 31547.02
RMSE_test_knn_m
## [1] 177.6148
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_knn_m,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_knn_m,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_knn_fit_m_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+MES+Feriados, data=Train_M_Dataset ,
method = "knn", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_knn_fit_m_m)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## theDots 0 -none- list
## xNames 13 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
caret_knn_fit_m_m
## k-Nearest Neighbors
##
## 48 samples
## 3 predictor
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 42, 44, 44, 44, 43, 43, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 151.7366 0.2637437 131.7257
## 7 154.0235 0.3431891 130.8144
## 9 133.1727 0.2492413 113.7180
## 11 130.2438 0.2492649 111.3584
## 13 126.0656 0.3005884 108.2311
## 15 123.8132 0.3114384 105.9967
## 17 124.8157 0.2806645 107.4023
## 19 125.4078 0.2615448 107.4120
## 21 125.8452 0.3329166 109.3247
## 23 125.4787 0.3010697 108.4377
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 15.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_knn_m_m<-predict(caret_knn_fit_m_m,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_knn_m_m<-mean((Train_M_Dataset $ACCIDENTES_GRAVES-y_tr_pred_knn_m_m)^2) # calcula el mse de entrenamiento
RMSE_tr_knn_m_m = sqrt(mse_tr_knn_m_m)
mse_tr_knn_m_m
## [1] 16520.6
RMSE_tr_knn_m_m
## [1] 128.5325
Calculo MSE y RMSE para los datos de validación
y_test_pred_knn_m_m<-predict(caret_knn_fit_m_m,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_knn_m_m<-mean((Test_M_Dataset$ACCIDENTES_GRAVES-y_test_pred_knn_m_m)^2) # calcula el mse de entrenamiento
RMSE_test_knn_m_m = sqrt(mse_test_knn_m_m)
mse_test_knn_m_m
## [1] 27160.38
RMSE_test_knn_m_m
## [1] 164.8041
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_knn_m_m,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_knn_m_m,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_knn_fit_m_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+MES+Feriados, data=Train_M_Dataset ,
method = "knn", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_knn_fit_m_sd)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## theDots 0 -none- list
## xNames 13 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
caret_knn_fit_m_sd
## k-Nearest Neighbors
##
## 48 samples
## 3 predictor
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 45, 43, 44, 44, 42, 43, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 165.6046 0.2967434 144.08699
## 7 159.1989 0.3370283 136.97538
## 9 127.1377 0.5456573 105.57562
## 11 120.4366 0.5795465 99.12592
## 13 118.7365 0.5605990 98.01849
## 15 114.8793 0.4136792 93.07322
## 17 114.2984 0.3199909 93.06156
## 19 113.8753 0.3135036 92.26790
## 21 114.2376 0.2793133 92.18137
## 23 114.6177 0.2133970 90.76607
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 19.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_knn_m_sd<-predict(caret_knn_fit_m_sd,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_knn_m_sd<-mean((Train_M_Dataset $ACCIDENTES_LEVES-y_tr_pred_knn_m_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_knn_m_sd = sqrt(mse_tr_knn_m_sd)
mse_tr_knn_m_sd
## [1] 13114.59
RMSE_tr_knn_m_sd
## [1] 114.519
Calculo MSE y RMSE para los datos de validación
y_test_pred_knn_m_sd<-predict(caret_knn_fit_m_sd,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_knn_m_sd<-mean((Test_M_Dataset$ACCIDENTES_LEVES-y_test_pred_knn_m_sd)^2) # calcula el mse de entrenamiento
RMSE_test_knn_m_sd = sqrt(mse_test_knn_m_sd)
mse_test_knn_m_sd
## [1] 15803.92
RMSE_test_knn_m_sd
## [1] 125.7137
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_knn_m_sd,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_knn_m_sd,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Tipo_de_accidentes= c("Total Accidentes","Accidentes graves","Accidentes leves")
RMSE_Train_knn = round(c(RMSE_tr_knn_m,RMSE_tr_knn_m_m,RMSE_tr_knn_m_sd), 3)
RMSE_Test_knn = round(c(RMSE_test_knn_m,RMSE_test_knn_m_m,RMSE_test_knn_m_sd),3)
Tabla_knn = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_knn,RMSE_Test_knn))
Tabla_knn
## Tipo_de_accidentes RMSE_Train_knn RMSE_Test_knn
## 1 Total Accidentes 216.306 177.615
## 2 Accidentes graves 128.533 164.804
## 3 Accidentes leves 114.519 125.714
glm_fit_m<-glm(TOTAL_ACCIDENTES∼Ano_Base+MES+Feriados, data=Train_M_Dataset , family = "poisson")
summary(glm_fit_m)
##
## Call:
## glm(formula = TOTAL_ACCIDENTES ~ Ano_Base + MES + Feriados, family = "poisson",
## data = Train_M_Dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7770 -0.9136 -0.1718 1.1553 3.2824
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.040802 0.013490 596.073 < 2e-16 ***
## Ano_Base 0.021498 0.004872 4.413 1.02e-05 ***
## MES02 0.076534 0.015886 4.818 1.45e-06 ***
## MES03 0.181253 0.012534 14.461 < 2e-16 ***
## MES04 0.122707 0.012707 9.656 < 2e-16 ***
## MES05 0.193291 0.012276 15.745 < 2e-16 ***
## MES06 0.111670 0.012573 8.882 < 2e-16 ***
## MES07 0.174897 0.012542 13.944 < 2e-16 ***
## MES08 0.220877 0.012200 18.104 < 2e-16 ***
## MES09 0.166032 0.015686 10.585 < 2e-16 ***
## MES10 0.164539 0.013244 12.423 < 2e-16 ***
## MES11 0.122828 0.012477 9.844 < 2e-16 ***
## MES12 0.153123 0.012389 12.359 < 2e-16 ***
## Feriados -0.018265 0.004898 -3.729 0.000192 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 667.240 on 47 degrees of freedom
## Residual deviance: 94.222 on 34 degrees of freedom
## AIC: 602.36
##
## Number of Fisher Scoring iterations: 3
glm_fit_m
##
## Call: glm(formula = TOTAL_ACCIDENTES ~ Ano_Base + MES + Feriados, family = "poisson",
## data = Train_M_Dataset)
##
## Coefficients:
## (Intercept) Ano_Base MES02 MES03 MES04
## 8.04080 0.02150 0.07653 0.18125 0.12271
## MES05 MES06 MES07 MES08 MES09
## 0.19329 0.11167 0.17490 0.22088 0.16603
## MES10 MES11 MES12 Feriados
## 0.16454 0.12283 0.15312 -0.01826
##
## Degrees of Freedom: 47 Total (i.e. Null); 34 Residual
## Null Deviance: 667.2
## Residual Deviance: 94.22 AIC: 602.4
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_glm_m<-predict(glm_fit_m,Train_M_Dataset [,c("Ano_Base","MES","Feriados")],type="response")
mse_tr_glm_m<-mean((Train_M_Dataset $TOTAL_ACCIDENTES-y_tr_pred_glm_m)^2) # calcula el mse de entrenamiento
RMSE_tr_glm_m = sqrt(mse_tr_glm_m)
mse_tr_glm_m
## [1] 7022.906
RMSE_tr_glm_m
## [1] 83.80278
Calculo MSE y RMSE para los datos de validación
y_test_pred_glm_m<-predict(glm_fit_m,Test_M_Dataset[,c("Ano_Base","MES","Feriados")],type="response")
mse_test_glm_m<-mean((Train_M_Dataset $TOTAL_ACCIDENTES-y_test_pred_glm_m)^2) # calcula el mse de entrenamiento
RMSE_test_glm_m = sqrt(mse_test_glm_m)
mse_test_glm_m
## [1] 12294.85
RMSE_test_glm_m
## [1] 110.8821
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_glm_m,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_glm_m,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
glm_fit_m_m<-glm(ACCIDENTES_GRAVES∼Ano_Base+MES+Feriados, data=Train_M_Dataset , family = "poisson")
summary(glm_fit_m_m)
##
## Call:
## glm(formula = ACCIDENTES_GRAVES ~ Ano_Base + MES + Feriados,
## family = "poisson", data = Train_M_Dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.82869 -1.29977 -0.06613 0.96614 2.88852
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.482694 0.017965 416.510 < 2e-16 ***
## Ano_Base 0.010862 0.006527 1.664 0.09612 .
## MES02 0.065540 0.021162 3.097 0.00195 **
## MES03 0.170614 0.016646 10.250 < 2e-16 ***
## MES04 0.097063 0.016929 5.734 9.83e-09 ***
## MES05 0.168463 0.016347 10.305 < 2e-16 ***
## MES06 0.095051 0.016717 5.686 1.30e-08 ***
## MES07 0.152485 0.016701 9.130 < 2e-16 ***
## MES08 0.214415 0.016178 13.253 < 2e-16 ***
## MES09 0.149577 0.020911 7.153 8.48e-13 ***
## MES10 0.123908 0.017709 6.997 2.61e-12 ***
## MES11 0.087723 0.016659 5.266 1.40e-07 ***
## MES12 0.085598 0.016667 5.136 2.81e-07 ***
## Feriados -0.017289 0.006556 -2.637 0.00836 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 411.53 on 47 degrees of freedom
## Residual deviance: 105.21 on 34 degrees of freedom
## AIC: 585.25
##
## Number of Fisher Scoring iterations: 3
glm_fit_m_m
##
## Call: glm(formula = ACCIDENTES_GRAVES ~ Ano_Base + MES + Feriados,
## family = "poisson", data = Train_M_Dataset)
##
## Coefficients:
## (Intercept) Ano_Base MES02 MES03 MES04
## 7.48269 0.01086 0.06554 0.17061 0.09706
## MES05 MES06 MES07 MES08 MES09
## 0.16846 0.09505 0.15249 0.21441 0.14958
## MES10 MES11 MES12 Feriados
## 0.12391 0.08772 0.08560 -0.01729
##
## Degrees of Freedom: 47 Total (i.e. Null); 34 Residual
## Null Deviance: 411.5
## Residual Deviance: 105.2 AIC: 585.2
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_glm_m_m<-predict(glm_fit_m_m,Train_M_Dataset [,c("Ano_Base","MES","Feriados","Freq_viernes","Feriados_Lunes")],type="response")
mse_tr_glm_m_m<-mean((Train_M_Dataset $ACCIDENTES_GRAVES-y_tr_pred_glm_m_m)^2) # calcula el mse de entrenamiento
RMSE_tr_glm_m_m = sqrt(mse_tr_glm_m_m)
mse_tr_glm_m_m
## [1] 4256.688
RMSE_tr_glm_m_m
## [1] 65.2433
Calculo MSE y RMSE para los datos de validación
y_test_pred_glm_m_m<-predict(glm_fit_m_m,Test_M_Dataset[,c("Ano_Base","MES","Feriados")],type="response")
mse_test_glm_m_m<-mean((Train_M_Dataset $ACCIDENTES_GRAVES-y_test_pred_glm_m_m)^2) # calcula el mse de entrenamiento
RMSE_test_glm_m_m = sqrt(mse_test_glm_m_m)
mse_test_glm_m_m
## [1] 5190.199
RMSE_test_glm_m_m
## [1] 72.04304
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_glm_m_m,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_glm_m_m,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
glm_fit_m_sd<-glm(ACCIDENTES_LEVES∼Ano_Base+MES+Feriados, data=Train_M_Dataset , family = "poisson")
summary(glm_fit_m_sd)
##
## Call:
## glm(formula = ACCIDENTES_LEVES ~ Ano_Base + MES + Feriados, family = "poisson",
## data = Train_M_Dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.13147 -1.07468 0.05817 0.94150 3.12436
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.191696 0.020429 352.039 < 2e-16 ***
## Ano_Base 0.034873 0.007320 4.764 1.90e-06 ***
## MES02 0.091020 0.024048 3.785 0.000154 ***
## MES03 0.195228 0.019047 10.250 < 2e-16 ***
## MES04 0.155753 0.019237 8.096 5.66e-16 ***
## MES05 0.225331 0.018595 12.118 < 2e-16 ***
## MES06 0.133295 0.019079 6.986 2.82e-12 ***
## MES07 0.203902 0.018998 10.733 < 2e-16 ***
## MES08 0.229392 0.018578 12.348 < 2e-16 ***
## MES09 0.187511 0.023723 7.904 2.69e-15 ***
## MES10 0.216001 0.019962 10.820 < 2e-16 ***
## MES11 0.167601 0.018838 8.897 < 2e-16 ***
## MES12 0.236247 0.018550 12.736 < 2e-16 ***
## Feriados -0.019486 0.007369 -2.644 0.008182 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 443.58 on 47 degrees of freedom
## Residual deviance: 109.38 on 34 degrees of freedom
## AIC: 578.38
##
## Number of Fisher Scoring iterations: 3
glm_fit_m_sd
##
## Call: glm(formula = ACCIDENTES_LEVES ~ Ano_Base + MES + Feriados, family = "poisson",
## data = Train_M_Dataset)
##
## Coefficients:
## (Intercept) Ano_Base MES02 MES03 MES04
## 7.19170 0.03487 0.09102 0.19523 0.15575
## MES05 MES06 MES07 MES08 MES09
## 0.22533 0.13330 0.20390 0.22939 0.18751
## MES10 MES11 MES12 Feriados
## 0.21600 0.16760 0.23625 -0.01949
##
## Degrees of Freedom: 47 Total (i.e. Null); 34 Residual
## Null Deviance: 443.6
## Residual Deviance: 109.4 AIC: 578.4
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_glm_m_sd<-predict(glm_fit_m_sd,Train_M_Dataset [,c("Ano_Base","MES","Feriados")],type="response")
mse_tr_glm_m_sd<-mean((Train_M_Dataset $ACCIDENTES_LEVES-y_tr_pred_glm_m_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_glm_m_sd = sqrt(mse_tr_glm_m_sd)
mse_tr_glm_m_sd
## [1] 3517.603
RMSE_tr_glm_m_sd
## [1] 59.30938
Calculo MSE y RMSE para los datos de validación
y_test_pred_glm_m_sd<-predict(glm_fit_m_sd,Test_M_Dataset[,c("Ano_Base","MES","Feriados")],type="response")
mse_test_glm_m_sd<-mean((Train_M_Dataset $ACCIDENTES_LEVES-y_test_pred_glm_m_sd)^2) # calcula el mse de entrenamiento
RMSE_test_glm_m_sd = sqrt(mse_test_glm_m_sd)
mse_test_glm_m_sd
## [1] 5528.337
RMSE_test_glm_m_sd
## [1] 74.35279
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_glm_m_sd,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_glm_m_sd,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
#### REsumen Modelos Regresión lineal generalizado para los diferentes tipos de accidente
Tipo_de_accidentes= c("Total Accidentes","Accidentes Graves","Accidentes Leves")
RMSE_Train_glm = round(c(RMSE_tr_glm_m,RMSE_tr_glm_m_m,RMSE_tr_glm_m_sd), 3)
RMSE_Test_glm = round(c(RMSE_test_glm_m,RMSE_test_glm_m_m,RMSE_test_glm_m_sd),3)
Tabla_glm = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_glm,RMSE_Test_glm))
Tabla_glm
## Tipo_de_accidentes RMSE_Train_glm RMSE_Test_glm
## 1 Total Accidentes 83.803 110.882
## 2 Accidentes Graves 65.243 72.043
## 3 Accidentes Leves 59.309 74.353
trcntrl = trainControl(method="cv", number=10)
caret_tree_fit_m = caret::train(TOTAL_ACCIDENTES∼Ano_Base+MES+Feriados,data=Train_M_Dataset ,
method = "rpart", trControl = trcntrl,
parms = list(split = "gini"),
preProcess=c("center", "scale"),
tuneLength = 10)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
caret_tree_fit_m
## CART
##
## 48 samples
## 3 predictor
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 44, 44, 44, 42, 44, 42, ...
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.000000000 190.7647 0.4746258 154.7190
## 0.009395662 190.7647 0.4746258 154.7190
## 0.018791325 190.7647 0.4746258 154.7190
## 0.028186987 192.6841 0.4791335 156.6079
## 0.037582649 197.2557 0.4715011 160.7902
## 0.046978312 198.6513 0.4715011 162.8287
## 0.056373974 208.6457 0.4322710 171.6781
## 0.065769636 209.5126 0.4201176 172.2044
## 0.075165299 215.6028 0.4267704 174.0021
## 0.084560961 217.7171 0.3473503 177.6980
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.01879132.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_tree_m<-predict(caret_tree_fit_m,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_tree_m<-mean((Train_M_Dataset $TOTAL_ACCIDENTES-y_tr_pred_tree_m)^2) # calcula el mse de entrenamiento
RMSE_tr_tree_m = sqrt(mse_tr_tree_m)
mse_tr_tree_m
## [1] 41776.09
RMSE_tr_tree_m
## [1] 204.392
Calculo MSE y RMSE para los datos de validación
y_test_pred_tree_m<-predict(caret_tree_fit_m,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_tree_m<-mean((Test_M_Dataset$TOTAL_ACCIDENTES-y_test_pred_tree_m)^2) # calcula el mse de entrenamiento
RMSE_test_tree_m = sqrt(mse_test_tree_m)
mse_test_tree_m
## [1] 61023.87
RMSE_test_tree_m
## [1] 247.0301
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_tree_m,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_tree_m,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_tree_fit_m_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+MES+Feriados,data=Train_M_Dataset ,
method = "rpart", trControl = trcntrl,
parms = list(split = "gini"),
preProcess=c("center", "scale"),
tuneLength = 10)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
caret_tree_fit_m_m
## CART
##
## 48 samples
## 3 predictor
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 42, 43, 43, 43, 44, 43, ...
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.00000000 121.4800 0.19812474 105.0612
## 0.01098192 123.1253 0.17874696 106.0382
## 0.02196385 122.2402 0.17520610 105.0382
## 0.03294577 119.3979 0.22997713 103.0443
## 0.04392770 119.3979 0.22997713 103.0443
## 0.05490962 119.3979 0.22997713 103.0443
## 0.06589155 119.3979 0.22997713 103.0443
## 0.07687347 124.5515 0.09921628 107.2660
## 0.08785540 124.5515 0.09921628 107.2660
## 0.09883732 127.4723 0.05570420 111.3363
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.06589155.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_tree_m_m<-predict(caret_tree_fit_m_m,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_tree_m_m<-mean((Train_M_Dataset $ACCIDENTES_GRAVES-y_tr_pred_tree_m_m)^2) # calcula el mse de entrenamiento
RMSE_tr_tree_m_m = sqrt(mse_tr_tree_m_m)
mse_tr_tree_m_m
## [1] 15023.26
RMSE_tr_tree_m_m
## [1] 122.5694
Calculo MSE y RMSE para los datos de validación
y_test_pred_tree_m_m<-predict(caret_tree_fit_m_m,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_tree_m_m<-mean((Test_M_Dataset$ACCIDENTES_GRAVES-y_test_pred_tree_m_m)^2) # calcula el mse de entrenamiento
RMSE_test_tree_m_m = sqrt(mse_test_tree_m_m)
mse_test_tree_m_m
## [1] 42876.64
RMSE_test_tree_m_m
## [1] 207.0667
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_tree_m_m,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_tree_m_m,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_tree_fit_m_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+MES+Feriados,data=Train_M_Dataset ,
method = "rpart", trControl = trcntrl,
parms = list(split = "gini"),
preProcess=c("center", "scale"),
tuneLength = 10)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
caret_tree_fit_m_sd
## CART
##
## 48 samples
## 3 predictor
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 43, 42, 42, 43, 44, 43, ...
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.000000000 115.5862 0.3980590 93.95687
## 0.007035088 114.5438 0.3320399 93.25158
## 0.014070176 112.8181 0.3448152 91.51805
## 0.021105264 112.8181 0.3448152 91.51805
## 0.028140352 112.8181 0.3448152 91.51805
## 0.035175440 112.8181 0.3448152 91.51805
## 0.042210528 112.8181 0.3448152 91.51805
## 0.049245616 119.9709 0.2316120 98.13218
## 0.056280704 121.8201 0.2334852 99.05406
## 0.063315792 121.8201 0.2334852 99.05406
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.04221053.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_tree_m_sd<-predict(caret_tree_fit_m_sd,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_tree_m_sd<-mean((Train_M_Dataset $ACCIDENTES_LEVES-y_tr_pred_tree_m_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_tree_m_sd = sqrt(mse_tr_tree_m_sd)
mse_tr_tree_m_sd
## [1] 12301.03
RMSE_tr_tree_m_sd
## [1] 110.91
Calculo MSE y RMSE para los datos de validación
y_test_pred_tree_m_sd<-predict(caret_tree_fit_m_sd,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_tree_m_sd<-mean((Test_M_Dataset$ACCIDENTES_LEVES-y_test_pred_tree_m_sd)^2) # calcula el mse de entrenamiento
RMSE_test_tree_m_sd = sqrt(mse_test_tree_m_sd)
mse_test_tree_m_sd
## [1] 14733.52
RMSE_test_tree_m_sd
## [1] 121.3817
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_tree_m_sd,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_tree_m_sd,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Tipo_de_accidentes= c("Total Accidentes","Accidentes Graves","Accidentes Leves")
RMSE_Train_tree = round(c(RMSE_tr_tree_m,RMSE_tr_tree_m_m,RMSE_tr_tree_m_sd), 3)
RMSE_Test_tree = round(c(RMSE_test_tree_m,RMSE_test_tree_m_m,RMSE_test_tree_m_sd),3)
Tabla_tree = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_tree,RMSE_Test_tree))
Tabla_tree
## Tipo_de_accidentes RMSE_Train_tree RMSE_Test_tree
## 1 Total Accidentes 204.392 247.03
## 2 Accidentes Graves 122.569 207.067
## 3 Accidentes Leves 110.91 121.382
trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+MES+Feriados, data=Train_M_Dataset ,
method = "rf", trControl = trcntrl,
prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 48 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 48 -none- numeric
## importance 13 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 2304 -none- numeric
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 48 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 13 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 2 -none- list
caret_rf_fit_s
## Random Forest
##
## 48 samples
## 3 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 42, 45, 44, 43, 43, 43, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 173.4228 0.5547127 141.9057
## 7 161.5589 0.5967454 142.7779
## 13 161.0391 0.5736748 141.5929
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 13.
plot(caret_rf_fit_s)
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_rf_s<-predict(caret_rf_fit_s,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_rf_s<-mean((Train_M_Dataset $TOTAL_ACCIDENTES-y_tr_pred_rf_s)^2) # calcula el mse de entrenamiento
RMSE_tr_rf_s = sqrt(mse_tr_rf_s)
mse_tr_rf_s
## [1] 8425.437
RMSE_tr_rf_s
## [1] 91.79018
Calculo MSE y RMSE para los datos de validación
y_test_pred_rf_s<-predict(caret_rf_fit_s,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_rf_s<-mean((Test_M_Dataset$TOTAL_ACCIDENTES-y_test_pred_rf_s)^2) # calcula el mse de entrenamiento
RMSE_test_rf_s = sqrt(mse_test_rf_s)
mse_test_rf_s
## [1] 31896.72
RMSE_test_rf_s
## [1] 178.5965
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_rf_s,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_rf_s,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+MES+Feriados, data=Train_M_Dataset ,
method = "rf", trControl = trcntrl,
prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s_m)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 48 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 48 -none- numeric
## importance 13 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 2304 -none- numeric
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 48 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 13 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 2 -none- list
caret_rf_fit_s_m
## Random Forest
##
## 48 samples
## 3 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 43, 44, 43, 43, 44, 42, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 97.85408 0.6040367 83.56640
## 7 84.13450 0.6419625 72.23023
## 13 84.80614 0.6285747 71.72686
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 7.
plot(caret_rf_fit_s_m)
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_rf_s_m<-predict(caret_rf_fit_s_m,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_rf_s_m<-mean((Train_M_Dataset $ACCIDENTES_GRAVES-y_tr_pred_rf_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_rf_s_m = sqrt(mse_tr_rf_s_m)
mse_tr_rf_s_m
## [1] 4075.912
RMSE_tr_rf_s_m
## [1] 63.84287
Calculo MSE y RMSE para los datos de validación
y_test_pred_rf_s_m<-predict(caret_rf_fit_s_m,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_rf_s_m<-mean((Test_M_Dataset$ACCIDENTES_GRAVES-y_test_pred_rf_s_m)^2) # calcula el mse de entrenamiento
RMSE_test_rf_s_m = sqrt(mse_test_rf_s_m)
mse_test_rf_s_m
## [1] 41209.68
RMSE_test_rf_s_m
## [1] 203.0017
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_rf_s_m,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_rf_s_m,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+MES+Feriados, data=Train_M_Dataset ,
method = "rf", trControl = trcntrl,
prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s_sd)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 48 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 48 -none- numeric
## importance 13 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 2304 -none- numeric
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 48 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 13 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 2 -none- list
caret_rf_fit_s_sd
## Random Forest
##
## 48 samples
## 3 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 43, 43, 43, 45, 42, 43, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 100.95042 0.426075 83.97683
## 7 95.99575 0.468268 82.06022
## 13 95.88647 0.472629 81.18917
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 13.
plot(caret_rf_fit_s_sd)
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_rf_s_sd<-predict(caret_rf_fit_s_sd,Train_M_Dataset [,c("Ano_Base","MES","Feriados")])
mse_tr_rf_s_sd<-mean((Train_M_Dataset $ACCIDENTES_LEVES-y_tr_pred_rf_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_rf_s_sd = sqrt(mse_tr_rf_s_sd)
mse_tr_rf_s_sd
## [1] 4087.071
RMSE_tr_rf_s_sd
## [1] 63.93021
Calculo MSE y RMSE para los datos de validación
y_test_pred_rf_s_sd<-predict(caret_rf_fit_s_sd,Test_M_Dataset[,c("Ano_Base","MES","Feriados")])
mse_test_rf_s_sd<-mean((Test_M_Dataset$ACCIDENTES_LEVES-y_test_pred_rf_s_sd)^2) # calcula el mse de entrenamiento
RMSE_test_rf_s_sd = sqrt(mse_test_rf_s_sd)
mse_test_rf_s_sd
## [1] 4385.53
RMSE_test_rf_s_sd
## [1] 66.22333
Predicción en la muestra
plot_ly (data=Train_M_Dataset ,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_rf_s_sd,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_M_Dataset,
x = ~Ano_mes,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_rf_s_sd,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Tipo_de_accidentes= c("Total Accidentes","Total Graves","Total Leves")
RMSE_Train_rf = round(c(RMSE_tr_rf_s,RMSE_tr_rf_s_m,RMSE_tr_rf_s_sd), 3)
RMSE_Test_rf = round(c(RMSE_test_rf_s,RMSE_test_rf_s_m,RMSE_test_rf_s_sd),3)
Tabla_rf = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_rf,RMSE_Test_rf))
Tabla_rf
## Tipo_de_accidentes RMSE_Train_rf RMSE_Test_rf
## 1 Total Accidentes 91.79 178.597
## 2 Total Graves 63.843 203.002
## 3 Total Leves 63.93 66.223
Comparación en el entrenamiento
comparacion_tr<-data.frame(Ano_mes=Total_Dataset_Freq_M$Ano_mes[Total_Dataset_Freq_M$Ano_mes<="2017_12"],
ACCIDENTES=Total_Dataset_Freq_M$TOTAL_ACCIDENTES[Total_Dataset_Freq_M$Ano_mes<="2017_12"],
lm= y_tr_pred_lm_m,
knn= y_tr_pred_knn_m,
glm=y_tr_pred_glm_m ,
arbol=y_tr_pred_tree_m,
rf=y_tr_pred_rf_s)
plot_ly (data=comparacion_tr,
x = ~Ano_mes,
y = ~ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbol',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Total Accidentes (Entrenamiento)',
xaxis=list(title="Fecha"),
yaxis=list(title="Unidades"),
legend = list(x = 1, y = 0.9))
Comparación en la validación
comparacion_vl<-data.frame(Ano_mes=Test_M_Dataset$Ano_mes,
ACCIDENTES=Test_M_Dataset$TOTAL_ACCIDENTES,
lm= y_test_pred_lm_m,
knn= y_test_pred_knn_m,
glm=y_test_pred_glm_m ,
arbol=y_test_pred_tree_m,
rf=y_test_pred_rf_s)
plot_ly (data=comparacion_vl,
x = ~Ano_mes,
y = ~ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbol',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Total Accidentes (Validación)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Entrenamiento<-round(c(RMSE_tr_lm_m,RMSE_tr_knn_m,RMSE_tr_glm_m,RMSE_tr_tree_m,RMSE_tr_rf_s),3)
Validacion<-round(c(RMSE_test_lm_m,RMSE_test_knn_m,RMSE_test_glm_m,RMSE_test_tree_m,RMSE_test_rf_s),3)
nombres<-c("lm","knn","glm","árbol","bosque")
ResultadosRMSE<-data.frame(Entrenamiento=Entrenamiento,Validacion=Validacion)
rownames(ResultadosRMSE)<-nombres
Cálculo de la variación
ResultadosRMSE$Por_variacion<-((ResultadosRMSE$Validacion-ResultadosRMSE$Entrenamiento)/ResultadosRMSE$Entrenamiento)*100
ResultadosRMSE
## Entrenamiento Validacion Por_variacion
## lm 83.919 205.348 144.69786
## knn 216.306 177.615 -17.88716
## glm 83.803 110.882 32.31269
## árbol 204.392 247.030 20.86089
## bosque 91.790 178.597 94.57130
Comparación en el entrenamiento
comparacion_tr<-data.frame(Ano_mes=Total_Dataset_Freq_M$Ano_mes[Total_Dataset_Freq_M$Ano_mes<="2017_12"],
ACCIDENTESG=Total_Dataset_Freq_M$ACCIDENTES_GRAVES[Total_Dataset_Freq_M$Ano_mes<="2017_12"],
lm= y_tr_pred_lm_m_m,
knn= y_tr_pred_knn_m_m,
glm=y_tr_pred_glm_m_m,
arbol=y_tr_pred_tree_m_m,
rf=y_tr_pred_rf_s_m)
plot_ly (data=comparacion_tr,
x = ~Ano_mes,
y = ~ACCIDENTESG,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbo',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Accidentes graves (Entrenamiento)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Comparación en la validación
comparacion_vl<-data.frame(Ano_mes=Test_M_Dataset$Ano_mes,
ACCIDENTESG=Test_M_Dataset$ACCIDENTES_GRAVES,
lm= y_test_pred_lm_m_m,
knn= y_test_pred_knn_m_m,
glm=y_test_pred_glm_m_m,
arbol=y_test_pred_tree_m_m,
rf=y_test_pred_rf_s_m)
plot_ly (data=comparacion_vl,
x = ~Ano_mes,
y = ~ACCIDENTESG,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbol',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Accidentes graves (Validación)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Entrenamiento<-round(c(RMSE_tr_lm_m_m,RMSE_tr_knn_m_m,RMSE_tr_glm_m_m,RMSE_tr_tree_m_m,RMSE_tr_rf_s_m),3)
Validacion<-round(c(RMSE_test_lm_m_m,RMSE_test_knn_m_m,RMSE_test_glm_m_m,RMSE_test_tree_m_m,RMSE_test_rf_s_m),3)
nombres<-c("lm","knn","glm","árbol","bosque")
ResultadosRMSE<-data.frame(Entrenamiento=Entrenamiento,Validacion=Validacion)
rownames(ResultadosRMSE)<-nombres
Cálculo de la variación
ResultadosRMSE$Variacion<-((ResultadosRMSE$Validacion-ResultadosRMSE$Entrenamiento)/ResultadosRMSE$Entrenamiento)*100
ResultadosRMSE
## Entrenamiento Validacion Variacion
## lm 65.964 201.341 205.22861
## knn 128.533 164.804 28.21921
## glm 65.243 72.043 10.42257
## árbol 122.569 207.067 68.93913
## bosque 63.843 203.002 217.97065
Comparación en el entrenamiento
comparacion_tr<-data.frame(Ano_mes=Total_Dataset_Freq_M$Ano_mes[Total_Dataset_Freq_M$Ano_mes<="2017_12"],
ACCIDENTESL=Total_Dataset_Freq_M$ACCIDENTES_LEVES[Total_Dataset_Freq_M$Ano_mes<="2017_12"],
lm= y_tr_pred_lm_m_sd,
knn= y_tr_pred_knn_m_sd,
glm=y_tr_pred_glm_m_sd,
arbol=y_tr_pred_tree_m_sd,
rf=y_tr_pred_rf_s_sd)
plot_ly (data=comparacion_tr,
x = ~Ano_mes,
y = ~ACCIDENTESL,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbo',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Accidentes leves (Entrenamiento)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Comparación en la validación
comparacion_vl<-data.frame(Ano_mes=Test_M_Dataset$Ano_mes,
ACCIDENTESL=Test_M_Dataset$ACCIDENTES_GRAVES,
lm= y_test_pred_lm_m_sd,
knn= y_test_pred_knn_m_sd,
glm=y_test_pred_glm_m_sd,
arbol=y_test_pred_tree_m_sd,
rf=y_test_pred_rf_s_sd)
plot_ly (data=comparacion_vl,
x = ~Ano_mes,
y = ~ACCIDENTESL,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbol',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Accidentes lesves (Validación)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Entrenamiento<-round(c(RMSE_tr_lm_m_sd,RMSE_tr_knn_m_sd,RMSE_tr_glm_m_sd,RMSE_tr_tree_m_sd,RMSE_tr_rf_s_sd),3)
Validacion<-round(c(RMSE_test_lm_m_sd,RMSE_test_knn_m_sd,RMSE_test_glm_m_sd,RMSE_test_tree_m_sd,RMSE_test_rf_s_sd),3)
nombres<-c("lm","knn","glm","árbol","bosque")
ResultadosRMSE<-data.frame(Entrenamiento=Entrenamiento,Validacion=Validacion)
rownames(ResultadosRMSE)<-nombres
Cálculo de la variación
ResultadosRMSE$Variacion<-((ResultadosRMSE$Validacion-ResultadosRMSE$Entrenamiento)/ResultadosRMSE$Entrenamiento)*100
ResultadosRMSE
## Entrenamiento Validacion Variacion
## lm 59.583 66.603 11.781884
## knn 114.519 125.714 9.775670
## glm 59.309 74.353 25.365459
## árbol 110.910 121.382 9.441890
## bosque 63.930 66.223 3.586735
Teniendo como criterio el mínimo RMSE en la muestra de validación se eligen los siguientes modelos:
Se ajusta el modelo con todos los datos desde el 01-01-2014 al 31-12-2018
glm_fit_m_final<-glm(TOTAL_ACCIDENTES∼Ano_Base+MES+Feriados, data=Total_Dataset_Freq_M , family = "poisson")
summary(glm_fit_m_final)
##
## Call:
## glm(formula = TOTAL_ACCIDENTES ~ Ano_Base + MES + Feriados, family = "poisson",
## data = Total_Dataset_Freq_M)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.6894 -1.3880 0.0389 1.4887 3.7186
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.050094 0.012016 669.974 < 2e-16 ***
## Ano_Base 0.003067 0.004468 0.686 0.492
## MES02 0.068058 0.014110 4.823 1.41e-06 ***
## MES03 0.172526 0.011085 15.564 < 2e-16 ***
## MES04 0.115619 0.011700 9.882 < 2e-16 ***
## MES05 0.189865 0.011013 17.240 < 2e-16 ***
## MES06 0.111156 0.011254 9.877 < 2e-16 ***
## MES07 0.163301 0.011193 14.589 < 2e-16 ***
## MES08 0.215948 0.010949 19.724 < 2e-16 ***
## MES09 0.152463 0.013939 10.938 < 2e-16 ***
## MES10 0.161179 0.011834 13.621 < 2e-16 ***
## MES11 0.118019 0.011197 10.540 < 2e-16 ***
## MES12 0.150864 0.011112 13.577 < 2e-16 ***
## Feriados -0.020655 0.004278 -4.828 1.38e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 834.17 on 59 degrees of freedom
## Residual deviance: 191.60 on 46 degrees of freedom
## AIC: 819.23
##
## Number of Fisher Scoring iterations: 3
Se guardan el modelo en un objeto de r
saveRDS(glm_fit_m_final,"../Modelos/Prediccion_Total_Mensual.rds")
Modelo_Total_mensual<-readRDS(file="../Modelos/Prediccion_Total_Mensual.rds")
Se ajusta el modelo con todos los datos desde el 01-01-2014 al 31-12-2018
glm_fit_m_m_final<-glm(ACCIDENTES_GRAVES∼Ano_Base+MES+Feriados, data=Total_Dataset_Freq_M, family = "poisson")
summary(glm_fit_m_m_final)
##
## Call:
## glm(formula = ACCIDENTES_GRAVES ~ Ano_Base + MES + Feriados,
## family = "poisson", data = Total_Dataset_Freq_M)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.4227 -1.3795 0.0799 1.2927 4.6366
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.491875 0.016027 467.444 < 2e-16 ***
## Ano_Base -0.023352 0.006002 -3.890 0.00010 ***
## MES02 0.050832 0.018871 2.694 0.00707 **
## MES03 0.165011 0.014738 11.196 < 2e-16 ***
## MES04 0.087419 0.015622 5.596 2.20e-08 ***
## MES05 0.161570 0.014706 10.987 < 2e-16 ***
## MES06 0.093435 0.014997 6.230 4.66e-10 ***
## MES07 0.141215 0.014938 9.453 < 2e-16 ***
## MES08 0.195420 0.014593 13.391 < 2e-16 ***
## MES09 0.133319 0.018644 7.151 8.63e-13 ***
## MES10 0.115258 0.015882 7.257 3.95e-13 ***
## MES11 0.063384 0.015051 4.211 2.54e-05 ***
## MES12 0.079161 0.014994 5.280 1.29e-07 ***
## Feriados -0.016373 0.005740 -2.853 0.00434 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 648.15 on 59 degrees of freedom
## Residual deviance: 297.73 on 46 degrees of freedom
## AIC: 889.58
##
## Number of Fisher Scoring iterations: 3
Se guardan el modelo en un objeto de r
saveRDS(glm_fit_m_m_final,"../Modelos/Prediccion_Grave_Mensual.rds")
Modelo_Grave_mensual<-readRDS(file="../Modelos/Prediccion_Grave_Mensual.rds")
Se ajusta el modelo con todos los datos desde el 01-01-2014 al 31-12-2018
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_m_sd_final = caret::train(ACCIDENTES_LEVES∼Ano_Base+MES+Feriados, data=Total_Dataset_Freq_M ,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_m_sd_final)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -130.356 -34.185 0.716 41.016 134.546
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1566.217 8.752 178.952 < 2e-16 ***
## Ano_Base 27.276 8.833 3.088 0.00341 **
## MES02 33.320 15.314 2.176 0.03474 *
## MES03 72.649 11.989 6.060 2.36e-07 ***
## MES04 59.539 12.549 4.744 2.06e-05 ***
## MES05 91.921 11.950 7.692 8.45e-10 ***
## MES06 52.216 11.989 4.355 7.36e-05 ***
## MES07 76.734 12.103 6.340 8.94e-08 ***
## MES08 99.447 11.950 8.322 9.97e-11 ***
## MES09 71.226 15.314 4.651 2.81e-05 ***
## MES10 89.266 12.874 6.934 1.15e-08 ***
## MES11 73.972 11.950 6.190 1.50e-07 ***
## MES12 97.440 11.950 8.154 1.76e-10 ***
## Feriados -36.197 15.652 -2.313 0.02527 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 67.79 on 46 degrees of freedom
## Multiple R-squared: 0.7544, Adjusted R-squared: 0.685
## F-statistic: 10.87 on 13 and 46 DF, p-value: 4.754e-10
Se guardan el modelo en un objeto de r
saveRDS(caret_lm_fit_m_sd_final,"../Modelos/Prediccion_leves_Mensual.rds")
Modelo_leves_mensual<-readRDS(file="../Modelos/Prediccion_leves_Mensual.rds")
Se oganizan los datos necesarios para el pronóstico de los accidentes en los años 2019, 2020 y 2021
Importación de los datos
load("../data/Dias_Especiales_Mensuales.Rda")
datos_pronostico_mensual<-Dias_Especiales_Mensuales[,c("ANO","Ano_Base","MES","Feriados")]
datos_pronostico_mensual$ANO1 <- datos_pronostico_mensual$ANO
datos_pronostico_mensual$MES1 <- datos_pronostico_mensual$MES
library(dplyr)
datos_pronostico_mensual<-unite_(datos_pronostico_mensual, "Ano_Mes", c("ANO1","MES1"))
Predicción del Total de accidentes con el modelo de regresión lineal generalizado
datos_pronostico_mensual$prediccion_Total_m<-predict(Modelo_Total_mensual,datos_pronostico_mensual[,c("ANO","Ano_Base","MES","Feriados")],type="response")
Predicción de accidentes graves con el modelo de regresión lineal generalizado
datos_pronostico_mensual$prediccion_Graves_m<-predict(Modelo_Grave_mensual,datos_pronostico_mensual[,c("ANO","Ano_Base","MES","Feriados")],type="response")
Predicción de accidentes leves con el modelo de regresión lineal
datos_pronostico_mensual$prediccion_Leves_m<-predict(Modelo_leves_mensual,datos_pronostico_mensual[,c("ANO","Ano_Base","MES","Feriados")])
Se guardan los datos de pronóstico en un objeto de r
save(datos_pronostico_mensual,file="../Modelos/datos_pronostico_mensual.Rda")